ESM 244 Lab 3 pt. 1: working with spatial data

Author

Caroline Edmonds

Code
library(tidyverse)
library(janitor)
library(here)
library(broom)

# Spatial Packages
library(sf)
library(tmap)

0.1 Read in our data.

Code
ca_counties_row_sf <- read_sf(here('data', 'ca_counties', 'CA_Counties_TIGER2016.shp'))
Code
#create new sf dataframe
ca_counties_sf <- ca_counties_row_sf |>
  clean_names() |>
  mutate(land_km2 = aland/1e6) |>
  select(county = name, land_km2)

#geometry is a sticky variable and will automatically keep that column

#to get rid of
ca_counties_df <- ca_counties_row_sf |>
  as.data.frame() |>
  select(-geometry)

0.2 Check the coordinate reference system.

Code
ca_counties_sf |> st_crs()
Coordinate Reference System:
  User input: WGS 84 / Pseudo-Mercator 
  wkt:
PROJCRS["WGS 84 / Pseudo-Mercator",
    BASEGEOGCRS["WGS 84",
        ENSEMBLE["World Geodetic System 1984 ensemble",
            MEMBER["World Geodetic System 1984 (Transit)"],
            MEMBER["World Geodetic System 1984 (G730)"],
            MEMBER["World Geodetic System 1984 (G873)"],
            MEMBER["World Geodetic System 1984 (G1150)"],
            MEMBER["World Geodetic System 1984 (G1674)"],
            MEMBER["World Geodetic System 1984 (G1762)"],
            MEMBER["World Geodetic System 1984 (G2139)"],
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]],
            ENSEMBLEACCURACY[2.0]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4326]],
    CONVERSION["Popular Visualisation Pseudo-Mercator",
        METHOD["Popular Visualisation Pseudo Mercator",
            ID["EPSG",1024]],
        PARAMETER["Latitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["False easting",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["easting (X)",east,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["northing (Y)",north,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Web mapping and visualisation."],
        AREA["World between 85.06°S and 85.06°N."],
        BBOX[-85.06,-180,85.06,180]],
    ID["EPSG",3857]]

0.3 Explore the data visually.

Code
plot(ca_counties_sf |> select(land_km2))

Code
#plot in ggplot

ggplot() +
  geom_sf(data = ca_counties_sf, aes(fill = land_km2), color = 'white', size = 0.1) + 
  theme_void()+ #gets ride of lat and long on map
  scale_fill_gradientn(colors = c('cyan', 'blue', 'purple')) 

0.4 Read in records of Red Sesbania (invasive plant)

Code
sesbania_sf <- read_sf(here("data/red_sesbania/ds80_for_lab.gpkg")) |> clean_names()

sesbania_sf|> st_crs()  #different coordinate reference systems
Coordinate Reference System:
  User input: Custom 
  wkt:
PROJCRS["Custom",
    BASEGEOGCRS["NAD83",
        DATUM["North American Datum 1983",
            ELLIPSOID["GRS 1980",6378137,298.257222101,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["Degree",0.0174532925199433]],
        ID["EPSG",4269]],
    CONVERSION["unnamed",
        METHOD["Albers Equal Area",
            ID["EPSG",9822]],
        PARAMETER["Latitude of false origin",0,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8821]],
        PARAMETER["Longitude of false origin",-120,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8822]],
        PARAMETER["Latitude of 1st standard parallel",34,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8823]],
        PARAMETER["Latitude of 2nd standard parallel",40.5,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8824]],
        PARAMETER["Easting at false origin",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8826]],
        PARAMETER["Northing at false origin",-4000000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8827]]],
    CS[Cartesian,2],
        AXIS["easting",east,
            ORDER[1],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]],
        AXIS["northing",north,
            ORDER[2],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]]]
Code
plot(sesbania_sf |> select(id))

0.5 The analysis.

Let’s find the count of red sesbania observed locations in this dataset, by county, and then create a map of all CA counties using the fill color to indicate red sesbania counts.

0.5.1 Pseudocode

  • match projections/coordinate reference systems to counties

  • combine datasets

  • plot datasets together

  • spatial_join - matches up dataset

  • group by county

  • then map by gradient of amount red sesbania counts

0.6 Transform the CRS so records match counties

Code
sesbania_3857_sf <- st_transform(sesbania_sf, 3857) #know EPSG code way

sesbania_3857_2_sf <- st_transform(sesbania_sf, st_crs(ca_counties_sf)) #find it from the other dataset

0.7 Let’s plot the two together.

Code
ggplot() +
  geom_sf(data = ca_counties_sf) +
  geom_sf(data = sesbania_3857_sf, size = 1, color = 'red')

0.7.1 Spatial Join

Code
ca_sesb_sf <- st_join(ca_counties_sf, sesbania_3857_sf) #kept polygons

sesb_ca_sf <- st_join(sesbania_3857_sf, ca_counties_sf) #kept points

#keeps geoms of whatever is first

0.8 Summarize by county.

Code
sesb_counts_sf <- ca_sesb_sf |>
  group_by(county) |>
  summarize(n_records = sum(!is.na(id))) #no not count N/A as a value, if I do not this then will count N/A as 1 instead of zero

ggplot()+
  geom_sf(data = sesb_counts_sf, aes(fill = n_records), color = 'grey90', size = 1)+ 
  scale_fill_gradientn(colors = c("grey", "green","blue"))+
  theme_minimal()+
  labs(fill = "Number of S. punicea records")

0.9 Next Analysis:

For the county with the greatest number of red sesbania records, make a map of those locations and where they occur within the county.

0.9.1 Pseudocode:

  • find the biggest county

  • filter/select the largest county

  • take subset of other locations information - watershed or location

  • color by count

  • map it!

  • maybe use other other spatual join dataset - sesb_ca_sf

Code
#can find the amax amount by just viewing dataset (do below for actual code)

county_max <- sesb_counts_sf |>
  filter(n_records == max(n_records)) |>
  pull(county)

#slice_max(n_records, 1) another option
Code
solano_sesb_sf <- sesb_ca_sf |>
  filter(county == county_max) #if have more than with the same # use %in% ---

solano_sf <- ca_counties_sf |>
  filter(county %in% county_max)

ggplot()+
  geom_sf(data = solano_sf)+
  geom_sf(data = solano_sesb_sf, color = 'red')

1 Making an interactive map with ‘tmap’

Code
# | eval: false #do not want to embed this into html otherwise will be 110 mb
#| include: true

##Set the viewing mode to interactive

tmap_mode(mode='view')
Code
tm_shape(ca_counties_sf) +
  tm_fill("land_km2", palette = "BuGn") +
  tm_shape(sesbania_sf) +
  tm_dots()